#####
# author: Derek Norton
# date: 2024-01-16
# client: Cristalyne Bell, Jon Temte, Maureen Goss, Cecilia He
# purpose: HMPV script for sharing
# notes:
# syntax: window
#####

rm(list = ls())

library(dplyr)
library(tidyr)
library(magrittr)
library(ggplot2)
library(lme4)
library(tableone)



### load data

fam.set <-
  read.csv('outputs/tables/deident_hmpv_family_set_data_for_regression.csv') %>%
  as_tibble()

ind.set <-
  read.csv('outputs/tables/deident_hmpv_individual_set_data_for_SIR_and_regression.csv') %>%
  as_tibble()



### functions for diagnostics

pearson_X2 <- function (amod) {
  sum(residuals(amod, type = "pearson") ^ 2)
}
# pearson X^2 statistic; any model

disp_check <- function (amod) {
  pearson_X2(amod) / df.residual(amod)
}
# estimate dispersion parameter; generalized models



### family level model

fam.mod <-
  glm(prop_opps_infected ~ memb_per_bdrm + index_sev + index_age, data = fam.set,
    family = binomial, weights = fam_infect_opps)
# no errors or warnings

summary(fam.mod)
# older index => more spread

disp_check(fam.mod)
# looks okay
# there is a minor overfitting concern though, due to relatively small number
#   of cases after the index



### individual level set and model

ind.mod2 <-
  glmer(hmpv ~ memb_per_bdrm + index_sev + + index_age + age + (1 | record_id),
    data = filter(ind.set, !index_case), family = binomial)
# no errors or warngins

disp_check(ind.mod2)
# potentially underdisperse...

summary(ind.mod2)
# significance around ages



### univariate items and graphics

fam.set %>%
  ggplot(aes(x = index_age, y = prop_opps_infected)) +
  geom_smooth(method = 'lm') +
  geom_jitter(alpha = 0.3) +
  xlab('Age of index case in household') +
  ylab('Proportion of household members HMPV+\n(excluding index)')

ggsave(filename = 'outputs/figures/index case age vs prop of household infected (linear) no title.png',
  width = 6.5, height = 4)
ggsave(filename = 'outputs/figures/Figure 1.tiff', dpi = 1200,
  width = 5.5, height = 3.5)


ind.set %>%
  filter(!index_case) %>%
  .$hmpv %>%
  {binom.test(x = sum(.), n = length(.))}
# SIR and CI



### regression tables

tmp <-
summary(ind.mod2)$coef
tmp %>%
  as_tibble %>%
  mutate(Covar = rownames(tmp)) %>%
  rename(SE = `Std. Error`,
    p_value = `Pr(>|z|)`) %>%
  mutate(OR = exp(Estimate),
    ORlcl = exp(Estimate - 1.96 * SE) %>% round(digits = 2),
    ORucl = exp(Estimate + 1.96 * SE) %>% round(digits = 2),
    OR_CI = paste0(ORlcl, ' \u2014 ', ORucl)) %>%
  select(Covar, Estimate, SE, `z value`, OR, OR_CI, p_value) %>%
  write.csv(file = 'individual model summary.csv')
rm(tmp)


tmp <-
  summary(fam.mod)$coef
tmp %>%
  as_tibble %>%
  mutate(Covar = rownames(tmp)) %>%
  rename(SE = `Std. Error`,
    p_value = `Pr(>|z|)`) %>%
  mutate(OR = exp(Estimate),
    ORlcl = exp(Estimate - 1.96 * SE) %>% round(digits = 2),
    ORucl = exp(Estimate + 1.96 * SE) %>% round(digits = 2),
    OR_CI = paste0(ORlcl, ' \u2014 ', ORucl)) %>%
  select(Covar, Estimate, SE, `z value`, OR, OR_CI, p_value) %>%
  write.csv(file = 'family model summary.csv')
rm(tmp)


### symptoms exchange for severity in regressions
# fever, cough, rhino, malaise, nascon

s.fam.ls <-
  list(
    fever = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_fever + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps),
    
    cough = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_cough + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps),
    
    rhino = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_rhino + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps),

    malaise = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_malaise + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps),

    nascon = glm(formula = prop_opps_infected ~ memb_per_bdrm + index_nascon + 
    index_age, family = binomial, data = fam.set, weights = fam_infect_opps)
  )
# replace severity with individual symptoms present

lapply(s.fam.ls, function(amod) {summary(amod)$coef[3,]})
# no significance


s.ind.ls <-
  list(
    fever = glmer(hmpv ~ memb_per_bdrm + index_fever + index_age + age + (1 | record_id),
    data = filter(ind.set, !index_case), family = binomial),
    
    cough = glmer(hmpv ~ memb_per_bdrm + index_cough + index_age + age + (1 | record_id),
    data = filter(ind.set, !index_case), family = binomial),
    
    rhino = glmer(hmpv ~ memb_per_bdrm + index_rhino + index_age + age + (1 | record_id),
    data = filter(ind.set, !index_case), family = binomial),

    malaise = glmer(hmpv ~ memb_per_bdrm + index_malaise + index_age + age + (1 | record_id),
    data = filter(ind.set, !index_case), family = binomial),

    nascon = glmer(hmpv ~ memb_per_bdrm + index_nascon + index_age + age + (1 | record_id),
    data = filter(ind.set, !index_case), family = binomial)
  )
# replace severity with individual symptoms present

lapply(s.ind.ls, function(amod) {summary(amod)$coef[3,]})
# no significance



### time from index to next cases

ind.set %>%
  filter(hmpv, !index_case) %>%
  mutate(st_dt = as.Date(st_dt, format = '%m/%d/%Y'),
    first_hmpv_dt = as.Date(first_hmpv_dt, format = '%m/%d/%Y'),
    tthmpv = as.numeric(st_dt - first_hmpv_dt)) %>%
  .$tthmpv %T>%
  {print(summary(.))} %>%
  sd(na.rm = T)



### regression subject demog tables

ind.set %>%
  select(index_case, age, child, gender, relationship, memb_per_bdrm, hmpv, onset) %T>%
  {print(range(.$onset, na.rm = T))} %>%
  CreateTableOne(data = .) %>%
  {write.csv(print(.), file = 'individuals model demogs.csv')}
# individ demogs

fam.mod$data %>%
  na.omit %>%
  select(fam_infect_opps, prop_opps_infected, memb_per_bdrm, contains('index_')) %>%
  mutate(index_sev = factor(index_sev)) %>%
  CreateTableOne(data = .) %>%
  {write.csv(print(.), file = 'family model demogs.csv')}
# family demogs
